'''
import requests
#目标网站：QQ新闻
url = 'http://news.qq.com/'

#对网站发出请求
res = requests.get(url)
#获取网站的文本信息
print(res.text)

'''


#BeautifulSoup练习
from bs4 import BeautifulSoup
# \用于较长的字符串换行
html_sample='\
<html>\
<body>\
<h1 id="title">Hello World</h1>\
<a href="#" class="link">This is link1</a>\
<a href="# link2" class="link">This is link2</a>\
</body>\
</html>'

#'html.parser'为html解释器类型
soup = BeautifulSoup(html_sample,'html.parser')

#爬取特定的tag
header = soup.select('h1')
print(header)
alink = soup.select('a')
print(alink)

#进一步爬取特点的tag下的element
print(soup.select('h1')[0].text)
for link in alink:
    print(link.text)

print()

alink = soup.select('#title')
for link in soup.select('.link'):
    print(link)

#使用select找到a tag的href和class
alinks = soup.select('a')
for link in alinks:
    print(link['href'])
    print(link['class'])
